rm(list=ls())
library(tidyverse)

# Read Election Data and do some corrections of identified mistakes and tie elimination ------------
election_1996 <- read_rds(here::here("data","processed","elections","mun_elec_1996_oldsys.rds")) 
  
mayor_elections <- election_1996 %>% 
  filter(descricao_cargo == "PREFEITO")

# Check year
mayor_elections %>% distinct(ano_eleicao)

# Check number of municipalities
election_1996 %>% distinct(mun_code) %>% nrow()
mayor_elections %>% distinct(mun_code) %>% nrow()
# Note: Juazeiro do Norte (CE) does not have information for mayor (checked in the source)

#These ties were identified in the checks below and we add 1 vote to the elected to eliminate the tie
mayor_elections <- mayor_elections %>%
  mutate(total_votos = ifelse(mun_code %in% c("431244") & desc_sit_cand_tot == "ELEITO",total_votos+1,total_votos))

# Compute vote shares
mayor_elections <- mayor_elections %>% 
  group_by(descricao_cargo,sigla_uf,mun_code,num_turno) %>% 
  mutate(total_votes_mun = sum(total_votos),
         vote_share = total_votos/total_votes_mun*100) %>% 
  select(-total_votes_mun) %>% 
  ungroup()

# obtain vote shares of elections that went to 2nd round -------
mayor_2nd <- mayor_elections %>% 
  filter(num_turno == 2) %>% 
  pull(mun_code)

# Eliminate first round when goes to 2nd
mayor_elections <- mayor_elections %>% 
  filter(!(mun_code %in% mayor_2nd & num_turno == 1))

# Check number of municipalities
mayor_elections %>% distinct(mun_code) %>% nrow()

# Create rank variable
mayor_elections <- mayor_elections %>% 
  group_by(ano_eleicao,mun_code) %>% 
  mutate(rank = row_number(desc(vote_share)))  %>% 
  ungroup() 

# Check if rank == 1 is always the elected candidate
mayor_elections %>% 
  filter(str_detect(desc_sit_cand_tot,"^ELEITO$")) %>% 
  distinct(rank) # FAIL

fail_rank_1 <- mayor_elections %>% 
  filter(str_detect(desc_sit_cand_tot,"^ELEITO$"),rank>1)

# Note: only one election. Checked in the system and most voted candidate has fewer votes than the other candidate.
# Given that in this system, we cannot now if the other candidate was considered iregular, we exclude this observation.

mayor_elections <- mayor_elections %>% 
  anti_join(fail_rank_1, by = c("mun_code"))

# Check if exclusion worked
mayor_elections %>% 
  filter(str_detect(desc_sit_cand_tot,"^ELEITO$"),rank>1)

# Check if rank > 1 is always the non-elected candidate
mayor_elections %>% 
  filter(str_detect(desc_sit_cand_tot,"^NAO ELEITO$")) %>% 
  distinct(rank) # FAIL

# Check the failed cases above
fail_rank_greater1 <- mayor_elections %>% 
  filter(str_detect(desc_sit_cand_tot,"^NAO ELEITO$"),rank == 1)
# Note: only one election. Checked in the system and both candidates who run in the election have a situation = "Non-elected".
# We exclude this observation given that we cannot check if the election was irregular or had other problems.

mayor_elections <- mayor_elections %>% 
  anti_join(fail_rank_greater1, by = c("mun_code"))

# Check if exclusion worked
mayor_elections %>% 
  filter(str_detect(desc_sit_cand_tot,"^NAO ELEITO$"),rank == 1)

# Create dummy left -------------------
party_classification <- readxl::read_excel(here::here("data","raw","party_classification.xls"))

# Check left
left_parties <- party_classification %>% filter(left_dummy == 1)

# Check if there is any party not in the classification
mayor_elections %>% 
  anti_join(party_classification, by = c("sigla_partido"="party")) %>% 
  distinct(sigla_partido)

mayor_elections <- mayor_elections %>% 
  left_join(party_classification, by = c("sigla_partido"="party")) %>% 
  rename(mayor_left = left_dummy)

# Check party classifictions
mayor_elections %>% rename(left_dummy = mayor_left) %>%
  distinct(left_dummy,sigla_partido) %>% 
  rename(party = sigla_partido) %>% 
  setdiff(party_classification) %>% 
  nrow() # OK Pass test (all parties classified according to xsl file)

# Create runoff indicator
mayor_elections <- mayor_elections %>% 
  mutate(runoff = ifelse(num_turno == 2,1,0)) 

# check runoff
mayor_elections %>% distinct(runoff,num_turno)

# Create Margin of Victory ----

# prepare data to be in wider format
margin_of_victories <- mayor_elections %>% 
  rename(total_votes = total_votos) %>% 
  select(-descricao_cargo,-numero_cand,-desc_sit_cand_tot,-num_turno) %>%
  relocate(ano_eleicao,mun_code,mun_name,sigla_uf,runoff,rank) %>% 
  gather(variable, value, -(ano_eleicao:rank)) %>%  
  unite(rank,rank,variable) %>% 
  spread(rank,value, sep = "_") %>%  
  mutate_at(vars(matches("rank"),-matches("sigla|coliga|nome")),
            .funs = funs(as.numeric(.))) 

# Create a variable that allows us to check the rank of the best opposing candidate 
margin_of_victories <- margin_of_victories %>% 
  mutate(rank_best_opposition = ifelse(rank_1_mayor_left != rank_2_mayor_left,
                                       2,
                                       ifelse(rank_1_mayor_left!=rank_3_mayor_left,
                                              3,
                                              ifelse(rank_1_mayor_left!=rank_4_mayor_left,
                                                     4,
                                                     ifelse(rank_1_mayor_left!=rank_5_mayor_left,
                                                            5,
                                                            ifelse(rank_1_mayor_left!=rank_6_mayor_left,
                                                                   6,
                                                                   ifelse(rank_1_mayor_left!=rank_7_mayor_left,
                                                                          7,
                                                                          ifelse(rank_1_mayor_left!=rank_8_mayor_left,
                                                                                 8,
                                                                                 ifelse(rank_1_mayor_left!=rank_9_mayor_left,
                                                                                        9,
                                                                                        ifelse(rank_1_mayor_left!=rank_10_mayor_left,
                                                                                               10,
                                                                                               ifelse(rank_1_mayor_left!=rank_11_mayor_left,
                                                                                                      11,
                                                                                                      ifelse(rank_1_mayor_left!=rank_12_mayor_left,
                                                                                                             12,
                                                                                                             13))))))))))))

# Compute Margin of victory
margin_of_victories <- margin_of_victories %>% 
  mutate(margin_mayor_left = ifelse(rank_1_mayor_left != rank_2_mayor_left,
                                    rank_1_vote_share - rank_2_vote_share,
                                    ifelse(rank_1_mayor_left!=rank_3_mayor_left,
                                           rank_1_vote_share - rank_3_vote_share,
                                           ifelse(rank_1_mayor_left!=rank_4_mayor_left,
                                                  rank_1_vote_share - rank_4_vote_share,
                                                  ifelse(rank_1_mayor_left!=rank_5_mayor_left,
                                                         rank_1_vote_share - rank_5_vote_share,
                                                         ifelse(rank_1_mayor_left!=rank_6_mayor_left,
                                                                rank_1_vote_share - rank_6_vote_share,
                                                                ifelse(rank_1_mayor_left!=rank_7_mayor_left,
                                                                       rank_1_vote_share - rank_7_vote_share,
                                                                       ifelse(rank_1_mayor_left!=rank_8_mayor_left,
                                                                              rank_1_vote_share - rank_8_vote_share,
                                                                              ifelse(rank_1_mayor_left!=rank_9_mayor_left,
                                                                                     rank_1_vote_share - rank_9_vote_share,
                                                                                     ifelse(rank_1_mayor_left!=rank_10_mayor_left,
                                                                                            rank_1_vote_share - rank_10_vote_share,
                                                                                            ifelse(rank_1_mayor_left!=rank_11_mayor_left,
                                                                                                   rank_1_vote_share - rank_11_vote_share,
                                                                                                   ifelse(rank_1_mayor_left!=rank_12_mayor_left,
                                                                                                          rank_1_vote_share - rank_12_vote_share,
                                                                                                          rank_1_vote_share - rank_13_vote_share))))))))))),
         margin_mayor_left = ifelse(rank_1_mayor_left==1,margin_mayor_left,(-1)*margin_mayor_left),
  )

# Max rank of most voted 
margin_of_victories %>% 
  distinct(rank_best_opposition)

margin_of_victories %>% 
  mutate(max_rank = max(rank_best_opposition, na.rm = T)) %>% 
  distinct(max_rank)

# Do all NA margins have rank > 8
margin_of_victories %>% 
  filter(is.na(margin_mayor_left)) 
  #distinct(rank_best_opposition) # OK 

# Check if all margin > 0 (margin < 0)  have mayor_left =1 (mayor_left =0)
margin_of_victories %>% 
  filter(margin_mayor_left>0) %>% 
  distinct(rank_1_mayor_left)

margin_of_victories %>% 
  filter(margin_mayor_left<0) %>% 
  distinct(rank_1_mayor_left)

# Organize var order
margin_of_victories_1996 <- margin_of_victories %>% 
  relocate(rank_10_composicao_coligacao:rank_11_vote_share, .after = margin_mayor_left)
# SaveRDS -----

write_rds(margin_of_victories_1996,here::here("data","processed","elections","margin_of_victories_1996.rds"))

